1 Aggregated and atomic scores per method



# datasets = read_yaml("datasets.yml") 
# print(score_file)

# datasets = read_yaml("datasets.yml") 
# datasets = read_yaml(file_dataset) 


list_wd = strsplit(getwd(),'/')[[1]]
# Snakemake script : the current working dir is hadaca3_framework
if(list_wd[length(list_wd)] == 'hadaca3_framework'){
  score_files = list(list.files(path = "./output/scores/", full.names = TRUE))

# nextflow script :
}else{
  score_files = list(list.files(pattern = 'score-li*' ))
}


results_li <- data.frame(
  dataset = character(),
  ref = character(),

  preprocessing_mixRNA = character(),
  feature_selection_mixRNA = character(),

  preprocessing_RNA = character(),
  feature_selection_RNA = character(),

  preprocessing_scRNA = character(),
  feature_selection_scRNA = character(),
  deconvolution_rna = character(),

  preprocessing_mixMET = character(),
  feature_selection_mixMET = character(),

  preprocessing_MET = character(),
  feature_selection_MET = character(),
  deconvolution_met = character(),
  late_integration = character(),
  
  aid = numeric(),
  aid_norm = numeric(),
  aitchison = numeric(),
  aitchison_norm = numeric(),
  jsd = numeric(),
  jsd_norm = numeric(),
  mae = numeric(),
  mae_norm = numeric(),
  pearson_col = numeric(),
  pearson_col_norm = numeric(),
  pearson_row = numeric(),
  pearson_row_norm = numeric(),
  pearson_tot = numeric(),
  pearson_tot_norm = numeric(),
  rmse = numeric(),
  rmse_norm = numeric(),
  score_aggreg = numeric(),
  sdid = numeric(),
  sdid_norm = numeric(),
  spearman_col = numeric(),
  spearman_col_norm = numeric(),
  spearman_row = numeric(),
  spearman_row_norm = numeric(),
  spearman_tot = numeric(),
  spearman_tot_norm = numeric()
)


i = 0 
for (score_file in score_files[[1]]) {
  # Extract the base name of the file

  base_name <- basename(score_file)

  # Extract components from the file name

  components <- str_match(base_name, 
  #       dt   ref  OMIC  ppmR fsmR omic ppR fsR omic  ppSR fsSR  deR   omic  ppmM fsmM omic ppM  fsM  deM  li
  # "score-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
  "score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
  

  # components <- str_match(base_name, "score-(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)")[2:8]
  scores <- read_hdf5(score_file)
  # Append the extracted information to the results data frame
  results_li <- rbind(results_li,
    cbind(
     data.frame(
       dataset = components[1],
       ref = components[2],

       preprocessing_mixRNA = components[3],
       feature_selection_mixRNA = components[4],

       preprocessing_RNA = components[5],
       feature_selection_RNA = components[6],

       preprocessing_scRNA = components[7],
       feature_selection_scRNA = components[8],
       deconvolution_rna = components[9],

       preprocessing_mixMET = components[10],
       feature_selection_mixMET = components[11],

       preprocessing_MET = components[12],
       feature_selection_MET = components[13],
       deconvolution_met = components[14],

       late_integration = components[15],
       stringsAsFactors = FALSE
     ),
     scores
    ))
  rownames(results_li) = NULL

  i = i +1 
}

results_li %>%
  # filter(dc==2) %>%
  group_by(late_integration) %>%
  summarise(GlobalScore = median(score_aggreg)) %>%
  arrange(desc(GlobalScore))
#> # A tibble: 3 × 2
#>   late_integration GlobalScore
#>   <chr>                  <dbl>
#> 1 OnlyMet                0.663
#> 2 limeanRMSE             0.660
#> 3 OnlyRna                0.646


results_li_arrange = results_li %>%
  group_by(preprocessing_mixRNA, feature_selection_mixRNA, 
           preprocessing_RNA, feature_selection_RNA, 
           preprocessing_scRNA, feature_selection_scRNA, deconvolution_rna, 
           preprocessing_mixMET,feature_selection_mixMET, 
           preprocessing_MET, feature_selection_MET, deconvolution_met, 
           late_integration, .groups = "keep") %>% 
  summarise(GlobalScore = median(score_aggreg)) %>%
  arrange(desc(GlobalScore)) 
#> `summarise()` has grouped output by 'preprocessing_mixRNA',
#> 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA',
#> 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
#> 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET',
#> 'feature_selection_MET', 'deconvolution_met', 'late_integration'. You can
#> override using the `.groups` argument.




all_data_used = c('dataset', 'ref')
for(data_used in all_data_used){
  results_li[[data_used]] = factor(results_li[[data_used]], 
  levels = unique(results_li[[data_used]])) # levels will be alphabeticaly ordered
}



all_functions_li = c('preprocessing_mixRNA', 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA', 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna', 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met', 'late_integration' )
for( fun in all_functions_li){
  results_li[[fun]] = factor(results_li[[fun]], 
  levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset=='invitro1'],decreasing = T)])) # sort based on the results_li on the in vitro dataset
}



index_aggreg <- which(names(results_li) == "score_aggreg")

datatable(
  results_li[, c(1:length(all_functions_li)+2, index_aggreg)],
  extensions = 'Buttons',
  options = list(
    pageLength = 10,
    dom = 'Bfrtip',  # This includes the Buttons extension in the layout
    buttons = list(
      list(
        extend = 'colvis',
        text = 'Show/Hide Columns',
        columns = ':not(:first-child)'  # This allows all columns except the first to be toggled
      )
    )
  )
)

2 Early integration_table

#> # A tibble: 0 × 2
#> # ℹ 2 variables: early_integration <chr>, GlobalScore <dbl>

3 Visualisations of the top methods

3.1 top 5 best methods

3.2 top 5 worst methods

4 Visualisations of the different metrics

4.1 Paper figures

4.1.1 PP

4.1.2 FS

4.1.3 DE

4.1.4 LI

4.2 Aggregated scores

4.2.1 PP

4.2.2 FS

4.2.3 DE

4.2.4 LI

4.3 MAE

4.3.1 PP

4.3.2 FS

#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3.3 DE

#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.3.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.4 RMSE

4.4.1 PP

4.4.2 FS

#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.4.3 DE

#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.4.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.5 Spearman correlation (row)

4.5.1 PP

4.5.2 FS

4.5.3 DE

4.5.4 LI

4.6 Aitchison distance

4.6.1 PP

4.6.2 FS

#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.6.3 DE

#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 576 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

4.6.4 LI

#> Warning: Removed 626 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).